#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#define TAMLINHA 500
#define TAMPAL 50
#define TAMMPAL 100
#define TAMARQ 100
#define LIGAGRAVA 1
#define CONTINUAGRAVA 2
#define NAOGRAVA 3
#define DESLIGAGRAVA 0
#define ETIQUETAINDEFINIDA -1
#define PONTUACAODEFAULT -2
#define NAOGRAVAVEL -3
#define AREATEXTO 1
#define AREATAG 0

typedef struct {
	char *lema;
	int etq_def;
} carac_lexico_acentos_lema_etq;

typedef struct CARAC_lexico_acentos {
	char carac;
	carac_lexico_acentos_lema_etq *lema_etq;
	struct CARAC_lexico_acentos *menor;
	struct CARAC_lexico_acentos *igual;
	struct CARAC_lexico_acentos *maior;
} carac_lexico_acentos;

typedef struct CARAC_locucoes {
	char carac;
	int etq_def;
	struct CARAC_locucoes *menor;
	struct CARAC_locucoes *igual;
	struct CARAC_locucoes *maior;
} carac_locucoes;

typedef struct CARAC_SUFIXOS {
	char carac;
	char *lema[21];
	float etq[21];
	int etq_exato;
	struct CARAC_SUFIXOS *menor;
	struct CARAC_SUFIXOS *igual;
	struct CARAC_SUFIXOS *maior;
} carac_sufixos;

typedef struct TOQUEN {
	char saida[TAMLINHA];
	int maius;
	int grava;
	char palavra[TAMPAL];
	char epalavrae[TAMPAL];
	char lema[TAMPAL]; /* lema definitivo */
	int etq_def; /* etiqueta definitiva */
	char lema_sfx[21][TAMPAL]; /* lema devido seu sufixo */
	float ant[21]; /* probabilidade de cada etiqueta x ser deste toquen dada a palavra anterior: tqAtual->ant[x] = vizinho_posterior[tqAtual->prev->etq_def] */
	float etq_sfx[21]; /* probabilidade de cada etiqueta x ser deste toquen dado o seu sufixo */
	float pos[21];  /* probabilidade de cada etiqueta ser deste toquen dada a palavra posterior: tqAtual->pos[x] = vizinho_anterior[tqAtual->prox->etq_def] */
	struct TOQUEN *prev;
	struct TOQUEN *prox;
} toquen;

/* funcoes */
void le_lexico_acentos ( );
void insere_arv_lexico_acentos (char *, int , char *, int, carac_lexico_acentos **);
void pesq_lexico_acentos (char *,int,char *,int *,carac_lexico_acentos *);
void le_locucoes ( );
void insere_arv_locucoes (char *, int , int, carac_locucoes **);
int pesq_locucoes (char *,int, carac_locucoes *);
void le_sufixos ( );
void insere_arv_sufixos (char *, int , int, char *, carac_sufixos **);
void pesq_sufixos (char *,int,carac_sufixos *);
void trata_maior_sufixo (char *);
void trata_pre_tqAtual ();
void trata_loc_tqAtual ();
void trata_acent_tqAtual();
void trata_sfx_tqAtual ();
void trata_tqVz (toquen *);
int trata_Comb ();
void trata_tqPronto ();
void combinatracao(char *, char *, char *, int , char *, char *, char *, int);
void chama(toquen *);
int toqueniza (toquen *);
void pegaIdDoc(char *);
void minusculasemacento (char *);
void minuscula (char *);
char letra_minuscula (char);
char letra_maiuscula (char);

/* variaveis globais */
carac_lexico_acentos *raiz_lexico_acentos;
carac_locucoes *raiz_locucoes;
carac_sufixos *raiz_sufixos;
carac_sufixos *maior_sufixo;
int maior_coluna;
char lema_do_sufixo[21][TAMPAL];
float etq_do_sufixo[21];
long id_doc;
long numero_de_palavras;
char local_do_arquivo[TAMARQ];
toquen *tqPronto, *tqVz2, *tqVz1, *tqAtual, *tqLeitura;
FILE *fparaIdx, *fdocvoc;
int houveComb, primeiroToquen;
char carLidoAnterior, carLidoAtual, carLidoDemais;
int areaTextoTag;
int quantoshifens;
/*                                        0        1      2      3        4       5      6       7       8       9      10     11     12     13    14    15    16      17      18     19     20   */
char etq_2letras[21][4] = {"_AD","_AI","_AJ","_AV","_CC","_CS","_IN","_NC","_NO","_AP","_PS","_PD","_PI","_PL","_PN","_PP","_PR","_SU","_VA","_VB","_VG"};
/* le a proporcao das etiquetas do vizinho anterior de cada etiqueta */
/* linha corresponde a este token, coluna corresponde ao token anterior */
/* vizinho_anterior [a][b] siginfica probabilidade de b ter a como posterior */
float vizinho_anterior[21][21]={
	-1.00,-1.00,0.00,0.02,0.02,0.02,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.01,0.11,0.00,0.59,0.01,0.03,0.10,0.06,  /* 0 */
	-1.00,-1.00,0.01,0.05,0.03,0.01,0.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,0.05,0.01,0.33,0.03,0.15,0.25,0.08, /* 1 */
	0.15,0.03,0.02,0.06,0.03,0.00,0.00,0.01,0.00,0.00,0.01,0.01,0.00,0.00,0.01,0.00,0.03,0.50,0.07,0.02,0.03, /* 2 */
	0.01,0.00,0.03,0.05,0.05,0.02,0.00,0.00,0.00,0.03,0.00,0.01,0.01,0.02,0.13,0.02,0.04,0.19,0.06,0.13,0.19, /* 3 */
	0.00,0.00,0.13,0.01,0.00,0.00,0.00,0.03,0.00,0.03,0.00,0.00,0.00,0.00,0.05,0.01,0.00,0.55,0.00,0.04,0.14, /* 4 */
	0.00,0.00,0.07,0.07,0.03,0.00,0.00,0.00,0.00,0.01,0.00,0.00,0.01,0.00,0.06,0.01,0.08,0.07,0.14,0.35,0.08, /* 5 */
	0.01,0.00,0.02,0.00,0.04,0.00,0.00,0.01,0.00,0.00,0.00,0.00,0.02,0.01,0.33,0.00,0.01,0.12,0.01,0.04,0.37, /* 6 */
	0.05,0.00,0.01,0.05,0.04,0.00,0.00,0.04,0.00,0.01,0.00,0.00,0.01,0.00,0.05,0.00,0.27,0.21,0.02,0.06,0.18, /* 7 */
	0.48,0.00,0.00,0.00,0.02,0.00,0.00,0.00,0.00,0.00,0.02,0.00,0.00,0.00,0.05,0.00,0.15,0.11,0.00,0.02,0.13, /* 8 */
	0.03,0.00,0.03,0.08,0.02,0.00,0.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,0.03,0.01,0.01,0.28,0.31,0.06,0.09, /* 9 */
	0.21,0.00,0.00,0.03,0.04,0.02,0.00,0.00,0.00,0.01,0.00,0.00,0.01,0.00,0.06,0.00,0.33,0.02,0.02,0.20,0.04, /* 10 */
	0.00,0.00,0.01,0.05,0.03,0.04,0.00,0.00,0.00,0.01,0.00,0.00,0.01,0.01,0.13,0.00,0.40,0.03,0.07,0.10,0.10, /* 11 */
	0.12,0.01,0.01,0.04,0.04,0.02,0.00,0.00,0.00,0.01,0.00,0.01,0.01,0.01,0.11,0.00,0.27,0.03,0.05,0.18,0.08, /* 12 */
	0.06,0.00,0.06,0.00,0.01,0.00,0.00,0.00,0.00,0.01,0.00,0.08,0.01,0.00,0.02,0.00,0.09,0.36,0.02,0.01,0.26, /* 13 */
	0.00,0.00,0.13,0.03,0.00,0.00,0.00,0.05,0.00,0.03,0.00,0.00,0.01,0.00,0.05,0.01,0.00,0.59,0.01,0.05,0.04, /* 14 */
	0.00,0.00,0.01,0.11,0.03,0.06,0.00,0.00,0.00,0.00,0.00,0.00,0.01,0.08,0.11,0.02,0.14,0.09,0.03,0.19,0.09, /* 15 */
	0.00,0.00,0.08,0.03,0.02,0.00,0.00,0.02,0.00,0.07,0.00,0.00,0.01,0.00,0.05,0.01,0.00,0.50,0.02,0.11,0.08, /* 16 */
	0.36,0.04,0.11,0.01,0.03,0.00,0.00,0.04,0.00,0.00,0.02,0.01,0.02,0.00,0.05,-1.00,0.16,0.04,0.04,0.04,0.06, /* 17 */
	-1.00,-1.00,0.04,0.12,0.02,0.03,0.00,0.01,0.00,0.01,0.00,0.01,0.01,0.05,0.08,0.04,0.03,0.32,0.03,0.04,0.14, /* 18 */
	-1.00,-1.00,0.03,0.10,0.04,0.02,0.00,0.00,0.00,0.01,0.00,0.00,0.01,0.08,0.06,0.10,0.10,0.23,0.04,0.06,0.12, /* 19 */
	0.00,0.00,0.11,0.05,0.01,0.00,0.00,0.07,0.00,0.02,0.00,0.00,0.01,0.00,0.01,0.01,0.00,0.62,0.01,0.03,0.03 /* 20 */
};
/*   0      1      2     3     4     5      6      7      8     9     10   11   12    13    14    15    16    17    18   19   20     */
/*   AD   AI    AJ   AV    CC  CS    IN    NC   NO   AP    PS   PD   PI     PL    PN    PP    PR   SU    VA   VB  VG    */
/* le a proporcao das etiquetas do vizinho posterior de cada etiqueta */
/* linha corresponde a este token, coluna corresponde ao token posterior */
/* vizinho_posterior [a][b] siginfica probabilidade de b ter a como anterior */
float vizinho_posterior[21][21]={
	-1.00,-1.00,0.07,0.00,0.00,0.00,0.00,0.01,0.00,0.00,0.01,0.00,0.01,0.01,0.00,0.00,0.00,0.87,-1.00,-1.00,0.00, /* 0 */
	-1.00,-1.00,0.11,0.00,0.00,0.00,0.00,0.00,0.00,0.01,0.00,0.00,0.01,0.00,0.00,0.00,0.00,0.87,-1.00,-1.00,0.00, /* 1 */
	0.01,0.00,0.02,0.02,0.06,0.01,0.00,0.00,0.00,0.01,0.00,0.00,0.00,0.01,0.15,0.00,0.22,0.23,0.03,0.04,0.17, /* 2 */
	0.07,0.01,0.08,0.05,0.01,0.01,0.00,0.04,0.00,0.04,0.00,0.01,0.01,0.00,0.04,0.04,0.13,0.06,0.10,0.18,0.11, /* 3 */
	0.10,0.01,0.06,0.07,0.00,0.01,0.00,0.04,0.00,0.02,0.01,0.01,0.02,0.01,0.00,0.02,0.10,0.33,0.03,0.13,0.02, /* 4 */
	0.31,0.02,0.01,0.09,0.00,0.00,0.00,0.01,0.00,0.00,0.02,0.03,0.03,0.00,0.00,0.09,0.04,0.06,0.10,0.16,0.03, /* 5 */
	0.02,0.01,0.01,0.02,0.00,0.00,0.00,0.01,0.00,0.00,0.01,0.00,0.00,0.01,0.25,0.00,0.04,0.11,0.03,0.01,0.46, /* 6 */
	0.01,0.00,0.01,0.01,0.03,0.00,0.00,0.04,0.00,0.00,0.00,0.00,0.00,0.00,0.11,0.00,0.11,0.43,0.01,0.01,0.22, /* 7 */
	0.00,0.00,0.00,0.00,0.03,0.00,0.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,0.01,0.00,0.11,0.70,0.00,0.02,0.11, /* 8 */
	0.03,0.01,0.01,0.06,0.03,0.01,0.00,0.01,0.00,0.01,0.00,0.00,0.00,0.00,0.11,0.00,0.23,0.05,0.01,0.02,0.10, /* 9 */
	0.00,0.00,0.10,0.00,0.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,0.00,0.00,0.01,0.00,0.00,0.86,0.00,0.00,0.01, /* 10 */
	0.01,0.00,0.05,0.03,0.00,0.00,0.00,0.01,0.00,0.01,0.00,0.00,0.01,0.13,0.02,0.01,0.09,0.49,0.05,0.04,0.05, /* 11 */
	0.02,0.00,0.01,0.03,0.01,0.01,0.00,0.02,0.00,0.01,0.00,0.01,0.01,0.01,0.05,0.01,0.08,0.57,0.04,0.06,0.05, /* 12 */
	0.08,0.00,0.00,0.09,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.01,0.00,0.00,0.10,0.02,0.04,0.13,0.49,0.02, /* 13 */
	0.21,0.01,0.01,0.09,0.02,0.01,0.00,0.02,0.00,0.01,0.01,0.01,0.02,0.00,0.05,0.03,0.13,0.24,0.04,0.07,0.02, /* 14 */
	0.02,0.01,0.02,0.06,0.01,0.01,0.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,0.04,0.02,0.07,-1.00,0.10,0.53,0.06, /* 15 */
	0.47,0.03,0.01,0.01,0.00,0.00,0.00,0.05,0.00,0.00,0.01,0.02,0.02,0.01,0.00,0.01,0.00,0.30,0.01,0.05,0.00, /* 16 */
	0.01,0.00,0.16,0.03,0.05,0.00,0.00,0.02,0.00,0.02,0.00,0.00,0.00,0.01,0.12,0.00,0.26,0.04,0.07,0.06,0.18, /* 17 */
	0.10,0.06,0.11,0.08,0.00,0.04,0.00,0.02,0.00,0.20,0.00,0.02,0.02,0.01,0.01,0.01,0.09,0.10,0.03,0.09,0.02, /* 18 */
	0.17,0.04,0.02,0.07,0.01,0.04,0.00,0.02,0.00,0.02,0.02,0.01,0.03,0.00,0.04,0.03,0.22,0.14,0.02,0.06,0.04, /* 19 */
	0.09,0.01,0.02,0.09,0.04,0.01,0.00,0.06,0.00,0.02,0.00,0.01,0.01,0.04,0.03,0.02,0.15,0.21,0.05,0.10,0.03 /* 20 */
};

//#include "nomi.c"
//#include "sin.c"

int main (int argc,char *argv[]) {
	toquen *ant, *aux_loc;
	char nome_id_doc[TAMPAL];
	/*int x, y, w, z, i, u, cont, toquenizaPoximo;*/
	int x, y, iniciagravacao=0;
	carac_sufixos *auxsfx;
	char local_docvoc[TAMARQ], nome_docvoc[TAMARQ];

	if (argc<3) { printf("\nExecute este programa digitando 'forma local_dos_arquivos_de_dados' 'local_do_docvoc' < 'texto''\n"); exit(1); }
	strcpy(local_do_arquivo,argv[1]);
	strcpy(local_docvoc,argv[2]);
	strcpy(nome_docvoc,local_docvoc); strcat(nome_docvoc,"docvoc");
	fdocvoc=fopen(nome_docvoc,"ab");

	le_lexico_acentos ( );
	le_locucoes ( );
	le_sufixos ( );
	//le_irreg_nom ( );
	//le_aj_nom ( );
	//le_vb_nom ( );
	//le_sin_nom ( );
	quantoshifens=0;
	/* cria lista circular para 8 toquens */
	/* ou seja, a lista circular tem 3 toquens antes do atual e 4 apos */
	/* toquens antes:   tqPronto   tqVz2   tqVz1 */
	/* toquen atual: tqAtual */
	/* toquens apos: vaga_para_locucao   vaga_para_locucao   tqLeitura   vaga_para_combinatracao */
	tqPronto=(toquen *)malloc(sizeof(toquen));
	tqPronto->saida[0]='\0';
	tqPronto->grava=NAOGRAVA;
	strcpy(tqPronto->palavra,".");
	strcpy(tqPronto->epalavrae," . ");
	strcpy(tqPronto->lema,".");
	tqPronto->etq_def=PONTUACAODEFAULT;
	for (x=0;x<21;x++) { tqPronto->ant[x]=0.00; tqPronto->etq_sfx[x]=0.00; tqPronto->pos[x]=0.00; tqPronto->lema_sfx[x][0]='\0'; }
	tqVz1=tqPronto;
	for (y=0;y<7;y++) {
		tqVz1->prox=(toquen *)malloc(sizeof(toquen)); tqVz1->prox->prev=tqVz1;
		tqVz1->saida[0]='\0';
		tqVz1->grava=NAOGRAVA;
		strcpy(tqVz1->palavra,".");
		strcpy(tqVz1->epalavrae," . ");
		strcpy(tqVz1->lema,".");
		tqVz1->etq_def=PONTUACAODEFAULT;
		for (x=0;x<21;x++) { tqVz1->ant[x]=0.00; tqVz1->etq_sfx[x]=0.00; tqVz1->pos[x]=0.00; tqVz1->lema_sfx[x][0]='\0'; }
		tqVz1=tqVz1->prox;
	}
	tqVz1->prox=tqPronto; tqPronto->prev=tqVz1;
	/* fim da criacao da lista circular */
	/* estrutura os ponteiros */
	tqVz2=tqPronto->prox;
	tqVz1=tqVz2->prox;
	tqAtual=tqVz1->prox;
	/* tqAtual para primeiros tratamentos */
	tqLeitura=tqAtual->prox->prox->prox; /* toquen para leitura a partir do texto */
	/* ha dois toquens de folga entre tqAtual e tqLeitura para locucoes */
	/* ha um toquen de folga apos tqLeitura para combinatracoes */
	houveComb=0;
	numero_de_palavras=0;
	carLidoAnterior=carLidoAtual=carLidoDemais='\0';

	toquenizaTag(tqLeitura);
	toqueniza(tqLeitura);
	trata_Comb();

	while (strcmp(tqPronto->palavra,"/ARQ")) {
		/*printf("(%s)(%s)(%s)(%s)(%s)(%s)(%s)(%s)\n",tqPronto->palavra,tqVz2->palavra,tqVz1->palavra,tqAtual->palavra,tqAtual->prox->palavra,tqLeitura->prev->palavra,tqLeitura->palavra,tqLeitura->prox->palavra);
		printf("(%d)(%d)(%d)(%d)(%d)(%d)(%d)(%d)\n",tqPronto->grava,tqVz2->grava,tqVz1->grava,tqAtual->grava,tqAtual->prox->grava,tqLeitura->prev->grava,tqLeitura->grava,tqLeitura->prox->grava);*/
		if (tqAtual->etq_def>=ETIQUETAINDEFINIDA) { /* exclui PONTUACAODEFAULT e NAOGRAVAVEL */
			if (tqAtual->etq_def==ETIQUETAINDEFINIDA) trata_pre_tqAtual(); /* tratamento inicial */
			trata_loc_tqAtual(); /* tratamento de locucoes */
			if (tqAtual->etq_def==ETIQUETAINDEFINIDA) trata_acent_tqAtual(); /* tratamento de acentos */
			if (tqAtual->etq_def==ETIQUETAINDEFINIDA) trata_sfx_tqAtual (); /* tratamento de sufixos */
		}
		if (tqVz1->etq_def==ETIQUETAINDEFINIDA) trata_tqVz(tqVz1); /* primeira rodada de vizinhanca */
		if (tqVz2->etq_def!=PONTUACAODEFAULT && tqVz2->etq_def!=NAOGRAVAVEL) if (tqVz2->saida[0]=='\0') trata_tqVz(tqVz2); /* segunda rodada de vizinhanca */
		/* atualiza os ponteiros */
		tqPronto=tqVz2; tqVz2=tqVz1; tqVz1=tqAtual;
		tqAtual=tqAtual->prox; tqLeitura=tqLeitura->prox;
		if (strcmp(tqPronto->palavra,"/ARQ")) {
			/* realiza ultimos preparativos e mostra toquen pronto */
			if (tqPronto->etq_def!=PONTUACAODEFAULT && tqPronto->etq_def!=NAOGRAVAVEL) trata_tqPronto();
			if (tqPronto->grava==LIGAGRAVA) {
				sprintf(nome_id_doc,"<doc %ld>\n",id_doc);
				//fprintf(stdout,"%s",nome_id_doc);
			}
			if (tqPronto->grava!=NAOGRAVA)  {
				fprintf(stdout,"%s",tqPronto->saida);
			}
			if (houveComb) { houveComb=0; }
			else { toqueniza(tqLeitura); trata_Comb(); } /* faz leitura de novo toquen */
		}
	}
	fclose(fdocvoc);
} /* main */

void trata_pre_tqAtual() {
	int x, cont;
	if (strstr(" as o os ",tqAtual->epalavrae))
		{ strcpy(tqAtual->lema,"o"); tqAtual->etq_def=0; tqAtual->etq_sfx[0]=1.00; sprintf(tqAtual->saida,"%s o _AD\n",tqAtual->palavra); }
	else if (strstr(" um uma uns umas ",tqAtual->epalavrae))
		{ strcpy(tqAtual->lema,"um"); tqAtual->etq_def=1; tqAtual->etq_sfx[1]=1.00; sprintf(tqAtual->saida,"%s um _AI\n",tqAtual->palavra); }
	else if (strstr(" e ou ",tqAtual->epalavrae))
		{ strcpy(tqAtual->lema,(tqAtual->epalavrae)+1); tqAtual->lema[strlen(tqAtual->lema)-1]='\0'; tqAtual->etq_def=4; tqAtual->etq_sfx[4]=1.00; sprintf(tqAtual->saida,"%s %s _CC\n",tqAtual->palavra,tqAtual->lema); }
	else if (strstr(" em de com sem até contra durante desde após sob dentre ante apud exceto in per perante ",tqAtual->epalavrae))
		{ strcpy(tqAtual->lema,(tqAtual->epalavrae)+1); tqAtual->lema[strlen(tqAtual->lema)-1]='\0';
		minusculasemacento(tqAtual->lema); tqAtual->etq_def=16; tqAtual->etq_sfx[16]=1.00;
		sprintf(tqAtual->saida,"%s %s _PR\n",tqAtual->palavra,tqAtual->lema); }
	if (strlen(tqAtual->palavra)==1) {
		if (strrchr(".!?¿:[]{}=*§#<>",tqAtual->palavra[0])) {
			strcpy(tqAtual->lema,tqAtual->palavra); tqAtual->etq_def=14; tqAtual->etq_sfx[14]=1.00;
			sprintf(tqAtual->saida,"%s %s _PN\n",tqAtual->palavra,tqAtual->lema);
		}
		else if (strrchr(",;()",tqAtual->palavra[0])) {
			strcpy(tqAtual->lema,tqAtual->palavra); tqAtual->etq_def=20; tqAtual->etq_sfx[20]=1.00;
			sprintf(tqAtual->saida,"%s %s _VG\n",tqAtual->palavra,tqAtual->lema);
		}
		else if (strrchr("$¢£%Åå",tqAtual->palavra[0])) {
			strcpy(tqAtual->lema,tqAtual->palavra);  tqAtual->etq_def=17; tqAtual->etq_sfx[17]=1.00;
			sprintf(tqAtual->saida,"%s %s _SU\n",tqAtual->palavra,tqAtual->lema);
		}
		else if (strrchr("&+",tqAtual->palavra[0])) {
			strcpy(tqAtual->lema,tqAtual->palavra);  tqAtual->etq_def=4; tqAtual->etq_sfx[4]=1.00;
			sprintf(tqAtual->saida,"%s %s _CC\n",tqAtual->palavra,tqAtual->lema);
		}
		else if (strrchr("/",tqAtual->palavra[0])) {
			strcpy(tqAtual->lema,tqAtual->palavra);  tqAtual->etq_def=16; tqAtual->etq_sfx[16]=1.00;
			sprintf(tqAtual->saida,"%s %s _PR\n",tqAtual->palavra,tqAtual->lema);
		}
		else if (strrchr("0123456789",tqAtual->palavra[0])) {
			strcpy(tqAtual->lema,tqAtual->palavra);  tqAtual->etq_def=7; tqAtual->etq_sfx[7]=1.00;
			sprintf(tqAtual->saida,"%s %s _NC\n",tqAtual->palavra,tqAtual->lema);
		}
	}
	else {
		cont=0; for(x=0;x<strlen(tqAtual->palavra);x++) if (strrchr(",.0123456789",tqAtual->palavra[x])) cont++;
		if (cont>0) {
			if (cont==strlen(tqAtual->palavra)) {
				strcpy(tqAtual->lema,tqAtual->palavra); tqAtual->etq_def=7; tqAtual->etq_sfx[7]=1.00;
				sprintf(tqAtual->saida,"%s %s _NC\n",tqAtual->palavra,tqAtual->palavra);
			}
			else {
				strcpy(tqAtual->lema,(tqAtual->epalavrae)+1); tqAtual->lema[strlen(tqAtual->lema)-1]='\0';
				minusculasemacento(tqAtual->lema); tqAtual->etq_def=17; tqAtual->etq_sfx[17]=1.00;
				sprintf(tqAtual->saida,"%s %s _SU\n",tqAtual->palavra,tqAtual->lema);
			}
		}
	}
} /* trata_pre_tqAtual */

void trata_loc_tqAtual () {
	toquen *aux_loc, *tqDesloc_inicial, *tqDesloc_final;
	char linha[TAMLINHA];
	int x, i, toquens_deslocados;
	strcpy(linha,tqAtual->palavra); aux_loc=tqAtual->prox;
	for (x=0;x<3;x++) { strcat(linha,"="); strcat(linha,aux_loc->palavra); aux_loc=aux_loc->prox; }
	minusculasemacento(linha);
	i=pesq_locucoes (linha, 0, raiz_locucoes);
	if (i!=ETIQUETAINDEFINIDA) {
		toquens_deslocados=0;
		for (x=0;x<=maior_coluna;x++) if (linha[x]=='=') toquens_deslocados++;
		tqDesloc_inicial=tqAtual->prox;
		if (toquens_deslocados==1) {
			strcat(tqAtual->palavra,"="); strcat(tqAtual->palavra,tqAtual->prox->palavra);
			tqDesloc_final=tqDesloc_inicial;
		}
		else if (toquens_deslocados==2) {
			strcat(tqAtual->palavra,"="); strcat(tqAtual->palavra,tqAtual->prox->palavra);
			strcat(tqAtual->palavra,"="); strcat(tqAtual->palavra,tqAtual->prox->prox->palavra);
			tqDesloc_final=tqDesloc_inicial->prox;
		}
		else {
			strcat(tqAtual->palavra,"="); strcat(tqAtual->palavra,tqAtual->prox->palavra);
			strcat(tqAtual->palavra,"="); strcat(tqAtual->palavra,tqAtual->prox->prox->palavra);
			strcat(tqAtual->palavra,"="); strcat(tqAtual->palavra,tqAtual->prox->prox->prox->palavra);
			tqDesloc_final=tqDesloc_inicial->prox->prox;
		}
		strcpy(tqAtual->epalavrae," "); strcat(tqAtual->epalavrae,tqAtual->palavra); strcat(tqAtual->epalavrae," ");
		strcpy(tqAtual->lema,tqAtual->palavra); minusculasemacento(tqAtual->lema);
		/*if (tqAtual->maius) tqAtual->lema[0]=letra_minuscula(tqAtual->lema[0]);*/
		tqAtual->etq_def=i; tqAtual->etq_sfx[i]=1.00;
		sprintf(tqAtual->saida,"%s %s %s\n",tqAtual->palavra,tqAtual->lema,etq_2letras[i]);
		tqAtual->prox=tqDesloc_final->prox; tqDesloc_final->prox->prev=tqAtual;
		tqLeitura->prox->prox=tqDesloc_inicial; tqDesloc_inicial->prev=tqLeitura->prox;
		tqDesloc_final->prox=tqPronto; tqPronto->prev=tqDesloc_final;
		if (toquens_deslocados==1) {
			tqLeitura=tqLeitura->prox;
			if (houveComb) { houveComb=0; }
			else { toqueniza(tqLeitura); trata_Comb(); } /* faz leitura de novo toquen */
		}
		else if (toquens_deslocados==2) {
			tqLeitura=tqLeitura->prox;
			if (houveComb) { houveComb=0; }
			else { toqueniza(tqLeitura); trata_Comb(); } /* faz leitura de novo toquen */
			tqLeitura=tqLeitura->prox;
			if (houveComb) { houveComb=0; }
			else { toqueniza(tqLeitura); trata_Comb(); } /* faz leitura de novo toquen */
		}
		else {
			tqLeitura=tqLeitura->prev->prev->prev;
			if (houveComb) { houveComb=0; }
			else { toqueniza(tqLeitura); trata_Comb(); } /* faz leitura de novo toquen */
			tqLeitura=tqLeitura->prox;
			if (houveComb) { houveComb=0; }
			else { toqueniza(tqLeitura); trata_Comb(); } /* faz leitura de novo toquen */
			tqLeitura=tqLeitura->prox;
			if (houveComb) { houveComb=0; }
			else { toqueniza(tqLeitura); trata_Comb(); } /* faz leitura de novo toquen */
		}
	}
} /* trata_loc_tqAtual */

void trata_acent_tqAtual() {
	char palavra[TAMPAL];
	strcpy(palavra,tqAtual->palavra);
	minuscula(palavra);
	pesq_lexico_acentos (palavra,0,tqAtual->lema,&(tqAtual->etq_def),raiz_lexico_acentos);
	if (tqAtual->etq_def!=ETIQUETAINDEFINIDA) chama(tqAtual);
} /* trata_acent_tqAtual */

void trata_sfx_tqAtual () {
	char arvalap[TAMPAL];
	int x, y=0;
	float maiorfreq=0.98;
	for (x=strlen(tqAtual->palavra)-1;x>=0;x--) arvalap[y++]=tqAtual->palavra[x]; arvalap[y]='\0';
	minusculasemacento(arvalap);
	for (x=0;x<21;x++) { lema_do_sufixo[x][0]='\0'; etq_do_sufixo[x]=0.0; }
	maior_sufixo=NULL;
	pesq_sufixos(arvalap, 0, raiz_sufixos);
	if (maior_sufixo!=NULL) { trata_maior_sufixo(tqAtual->palavra); }
	for (x=0;x<21;x++) {
		if (lema_do_sufixo[x][0]!='\0') strcpy(tqAtual->lema_sfx[x],lema_do_sufixo[x]);
		/*tqAtual->etq_sfx[x]=tqAtual->etq_sfx[x]+etq_do_sufixo[x];*/
		if (tqAtual->etq_sfx[x]<etq_do_sufixo[x]) tqAtual->etq_sfx[x]=etq_do_sufixo[x];
	}
	for (x=0;x<21;x++) {
		if (tqAtual->etq_sfx[x]>maiorfreq) {
			tqAtual->etq_def=x;
			if (tqAtual->lema_sfx[x][0]!='\0') strcpy(tqAtual->lema,tqAtual->lema_sfx[x]);
			maiorfreq=tqAtual->etq_sfx[x];
		}
	}
	if (tqAtual->lema[0]=='\0') { strcpy(tqAtual->lema,tqAtual->palavra); minusculasemacento(tqAtual->lema); }
} /* trata_sfx_tqAtual */

void trata_tqVz(toquen *t) {
	float maiorprob, estaprob;
	int x, i;
	if (t->prev->etq_def!=ETIQUETAINDEFINIDA) for (x=0;x<21;x++) t->ant[x]=vizinho_posterior[t->prev->etq_def][x];
	if (t->prox->etq_def!=ETIQUETAINDEFINIDA) for (x=0;x<21;x++) t->pos[x]=vizinho_anterior[t->prox->etq_def][x];
	i = 17; /* em principio, e' substantivo */
	maiorprob = (t->ant[i] + 3*t->etq_sfx[i] + t->pos[i])/5; /* calcula a probabilidade de ser substantivo */
	for (x=0;x<21;x++) {
		estaprob = (t->ant[x] + 3*t->etq_sfx[x] + t->pos[x])/5; /* calcula a probabilidade desta etiqueta x */
		if (estaprob>maiorprob) {
			if (x!=0 && x!=1 && x!=16) { /* nao pega preposicao por probabilidade */
				i=x; maiorprob=estaprob; /* troca para outra etiqueta x, se encontra probabilidade maior */
			}
			else if (x==0) { /* nao pega preposicao por probabilidade */
				if (!strcmp(t->palavra,"a")) {
					i=x; maiorprob=estaprob;
				}
			}
			else if (x==16){
				if (strstr(" a entre para por pos sobre ",t->epalavrae)) {
					i=x; maiorprob=estaprob;
				}
			}
		}
	}
	t->etq_def=i;
	if ((i>=3 && i<=8) || i==13 || i==16) { /* palavras invariaveis */
		strcpy(t->lema,(t->epalavrae)+1); t->lema[strlen(t->lema)-1]='\0'; minusculasemacento(t->lema);
	}
	else if (t->lema_sfx[i][0]!='\0') strcpy(t->lema,t->lema_sfx[i]);
} /* trata_tqVz */

int trata_Comb() {
	char palavra[TAMPAL], epalavrae[TAMPAL], linha[TAMLINHA], /*ultimoChar,*/ palavra_pos[TAMPAL];
	int x, y, w, z, cont;
	/*strcpy(epalavrae," "); strcat(epalavrae,tqLeitura->palavra); strcat(epalavrae," ");
	minuscula(epalavrae);*/
	strcpy(tqLeitura->epalavrae," ");
	strcat(tqLeitura->epalavrae,tqLeitura->palavra);
	strcat(tqLeitura->epalavrae," ");
	minuscula(tqLeitura->epalavrae);
	if (tqLeitura->palavra[0]!=tqLeitura->epalavrae[1]) tqLeitura->maius=1; else tqLeitura->maius=0;
	strcpy(palavra_pos,(tqLeitura->palavra)+1);
	if (!strcmp(tqLeitura->palavra,"à")) { combinatracao("a"," a ","a",16,"a"," a ","a",0); }
	else if (strstr(" ao aos ",tqLeitura->epalavrae)) { combinatracao("a"," a ","a",16,palavra_pos," o ","o",0); }
	else if (strstr(" da do das dos ",tqLeitura->epalavrae)) { combinatracao("de"," de ","de",16,palavra_pos," o ","o",0); }
	else if (strstr(" na no nas nos ",tqLeitura->epalavrae)) {  combinatracao("em"," em ","em",16,palavra_pos," o ","o",0); }
	else if (strstr(" num nuns ",tqLeitura->epalavrae)) {  combinatracao("em"," em ","em",16,palavra_pos," um ","um",1); }
	else if (strstr(" noutro noutra noutros noutros ",tqLeitura->epalavrae)) {  combinatracao("em"," em ","em",16,palavra_pos," outro ","outro",12); }
	else if (strstr(" doutro doutra doutros doutros ",tqLeitura->epalavrae)) {  combinatracao("de"," de ","de",16,palavra_pos," outro ","outro",12); }
	else if (strstr(" naquela naquele naquelas naqueles ",tqLeitura->epalavrae)) { combinatracao("em"," em ","em",16,palavra_pos," aquele ","aquele",11); }
	else if (strstr(" neste nesta nestes nestas ",tqLeitura->epalavrae)) { combinatracao("em"," em ","em",16,palavra_pos," este ","este",11); }
	else if (strstr(" deste desta destes destas ",tqLeitura->epalavrae)) { combinatracao("de"," de ","de",16,palavra_pos," este ","este",11); }
	else if (strstr(" nesse nessa nesses nessas ",tqLeitura->epalavrae)) { combinatracao("em"," em ","em",16,palavra_pos," esse ","esse",11); }
	else if (strstr(" desse dessa desses dessas ",tqLeitura->epalavrae)) { combinatracao("de"," de ","de",16,palavra_pos," esse ","esse",11); }
	else if (strstr(" daquela daquele daquelas daqueles ",tqLeitura->epalavrae)) { combinatracao("de"," de ","de",16,palavra_pos," aquele ","aquele",11); }
	else if (strstr(" daquilo ",tqLeitura->epalavrae)) { combinatracao("de"," de ","de",16,"aquilo"," aquilo ","aquilo",11); }
	else if (strstr(" àquilo ",tqLeitura->epalavrae)) { combinatracao("a"," a ","a",16,"aquilo"," aquilo ","aquilo",11); }
	else if (tqLeitura->palavra[0]=='à' && tqLeitura->palavra[1]=='q') { combinatracao("a"," a ","a",16,"aquele"," aquele ","aquele",11); }
	else if (strstr(" daqui dali dacolá ",tqLeitura->epalavrae)) {
		char lema[TAMPAL];
		strcpy(palavra,(tqLeitura->palavra)+1); strcpy(lema,palavra); minusculasemacento(lema);
		strcpy(epalavrae," "); strcat(epalavrae,palavra); strcat(epalavrae," "); minuscula(epalavrae);
		{ combinatracao("de"," de ","de",16,palavra,epalavrae,lema,3); }
	}
	else if (strrchr(tqLeitura->palavra,'-')) { /* provavel presenca de pronome pessoal obliquo */
		strcpy(palavra,tqLeitura->epalavrae+1);
		strcpy(linha,palavra);
		linha[strlen(linha)-1]='\0';
		cont=0; w=-1; z=1; y=1;
		for (x=0;x<strlen(linha);x++) {
			if (linha[x]=='-') {
				cont++;
				if (cont==1) y=x; /* confidenciar-lhe */
				if (cont==2) w=x; /* confidenciar-lhe-ia */
			}
		}
		if (w==-1) w=x;
		epalavrae[0]=' ';
		for (z=1;z<w-y;z++) epalavrae[z]=linha[y+z];
		epalavrae[z++]=' '; epalavrae[z]='\0';
		if (strstr(" me te se no na nos vos o a lo la los las lha lhe os as lhes ",epalavrae)) {
			palavra[y]='\0';
			tqLeitura->epalavrae[y+1]='\0';
			linha[strlen(linha)]='\0';
			if (strstr(" me te se nos vos lha lhe lhes ",epalavrae)) {
				strcat(palavra,linha+w+1); /* confidenciar-lhe-ia --> confidenciaria */
				strcat(tqLeitura->epalavrae,linha+w+1);
			}
			else {
				if (w!=-1 && w!=x) {
					epalavrae[strlen(epalavrae)-1]='\0';
					strcat(epalavrae,"-");
					strcat(epalavrae,linha+w+1); /*meseu-na-cidade*/
					strcat(epalavrae," ");
				}
			}
			strcat(tqLeitura->epalavrae," ");
			epalavrae[strlen(epalavrae)-1]='\0';
			strcpy(palavra_pos,epalavrae+1);
			if (w!=-1 && w!=x) {
				combinatracao(palavra,tqLeitura->epalavrae,"",ETIQUETAINDEFINIDA,palavra_pos,epalavrae,"",ETIQUETAINDEFINIDA);
			}
			else {
				if (epalavrae[1]=='o' || epalavrae[1]=='a') combinatracao(palavra,tqLeitura->epalavrae,"",ETIQUETAINDEFINIDA,palavra_pos," o ","o",15);
				else combinatracao(palavra,tqLeitura->epalavrae,"",ETIQUETAINDEFINIDA,palavra_pos," me ","me",15);
				if (tqLeitura->palavra[strlen(tqLeitura->palavra)-1]=='s') {
					if (tqLeitura->prev->palavra[strlen(tqLeitura->prev->palavra)-1]=='a' || tqLeitura->prev->palavra[strlen(tqLeitura->prev->palavra)-1]=='e' || tqLeitura->prev->palavra[strlen(tqLeitura->prev->palavra)-1]=='o')
						{ tqLeitura->prev->palavra[strlen(palavra)+1]='\0'; tqLeitura->prev->palavra[strlen(palavra)]='s'; tqLeitura->etq_sfx[6]=-9999; /* para retirar depois */}
				}
				for (x=0;x<21;x++) tqLeitura->etq_sfx[x]=-1.00;
				tqLeitura->etq_sfx[18]=0.50;
				tqLeitura->etq_sfx[19]=0.90;
			}
		}
		else {
			palavra[y]='\0';
			tqLeitura->epalavrae[y+1]='\0';
			strcat(tqLeitura->epalavrae," ");
			epalavrae[strlen(epalavrae)-1]='\0';
			linha[strlen(linha)]='\0';
			if (w!=-1 && w!=x) {
				strcat(epalavrae,"-");
				strcat(epalavrae,linha+w+1); /*meseu-da-cidade*/
			}
			strcpy(palavra_pos,epalavrae+1);
			strcat(epalavrae," ");
			combinatracao(palavra,tqLeitura->epalavrae,"",ETIQUETAINDEFINIDA,palavra_pos,epalavrae,"",ETIQUETAINDEFINIDA);
			tqLeitura->etq_sfx[17]=1.00;
			tqLeitura->prox->etq_sfx[17]=1.00;
		}
	}
	return(1);
} /* trata_Comb */

void trata_tqPronto () {
	/*char epalavrae[TAMPAL];*/
	int i, u;
	/* inicia tratamento dos ultimos especiais */
	if (tqPronto->prox->etq_sfx[6]==-9999) tqPronto->palavra[strlen(tqPronto->palavra)-1]='\0'; /* tira o s colocado por causa do pronome obliquo */
	/*strcpy(epalavrae," "); strcat(epalavrae,tqPronto->palavra); strcat(epalavrae," ");
	minuscula(epalavrae);*/
	i = tqPronto->prev->etq_def; /* i identifica a etiqueta da palavra anterior */
	u = tqPronto->prox->etq_def; /* u identifica a etiqueta da palavra posterior */
	if (strstr(" a ",tqPronto->epalavrae)) {
		if ((i==17 && u==17) || u==18 || u==19  || u==7) sprintf(tqPronto->saida,"%s a _PR\n",tqPronto->palavra);
		else if (i==14) sprintf(tqPronto->saida,"%s o _AD\n",tqPronto->palavra);
	}
	else if (tqPronto->etq_def==19 && !strcmp(tqPronto->lema,"dever")) {
		if (u==18 || u==19) /* VA ou VB */ sprintf(tqPronto->saida,"%s %s _VA\n",tqPronto->palavra,tqPronto->lema);
		else sprintf(tqPronto->saida,"%s %s _VB\n",tqPronto->palavra,tqPronto->lema);
	}
	else if (tqPronto->etq_def==19 && !strcmp(tqPronto->lema,"poder")) {
		if (u==18 || u==19) /* VA ou VB */ sprintf(tqPronto->saida,"%s %s _VA\n",tqPronto->palavra,tqPronto->lema);
		else sprintf(tqPronto->saida,"%s %s _VB\n",tqPronto->palavra,tqPronto->lema);
	}
	else if (strstr(" são ",tqPronto->epalavrae)) {
		if( i==0  || i==1 || i==16 || i==19) /* AD ou AI ou PR antes */ sprintf(tqPronto->saida,"são sao _SU\n");
		else sprintf(tqPronto->saida,"são ser _VB\n");
	}
	else if (strstr(" pela pelas pelo pelos ",tqPronto->epalavrae)) {
		if (strstr(" pelo ",tqPronto->epalavrae)) { /* VB ou SU ou por... */
			if( i==0  || i==1 || i==16) /* AD ou AI ou PR antes */ sprintf(tqPronto->saida,"%s pelo _SU\n", tqPronto->palavra);
			else /* pode ser VB ou por... */ {
				if (u==0  || u==1 || u==16 || i==15) /* AD ou AI ou PR depois ou PP antes */ sprintf(tqPronto->saida,"%s pelar _VB\n", tqPronto->palavra);
				else if (tqPronto->maius) sprintf(tqPronto->saida,"Por por _PR\no o _AD\n");
				else sprintf(tqPronto->saida,"por por _PR\no o _AD\n");
			}
		}
		else if (strstr(" pelos ",tqPronto->epalavrae)) { /* SU ou por... */
			if( i==0  || i==1 || i==16) /* AD ou AI ou PR antes */ sprintf(tqPronto->saida,"%s pelo _SU\n",tqPronto->palavra);
			else if (tqPronto->maius) sprintf(tqPronto->saida,"Por por _PR\nos o _AD\n");
			else sprintf(tqPronto->saida,"por por _PR\nos o _AD\n");
		}
		else if (strstr(" pela pelas ",tqPronto->epalavrae)) { /* VB ou por... */
			if(i!=15 || i==19 || (u!=0 && u!=1))  /* nao PP ou sim VB antes ou nao AI e Ad depois */ {
				if (tqPronto->maius) sprintf(tqPronto->saida,"Por por _PR\n%so _AD\n",(tqPronto->epalavrae)+4);
				else sprintf(tqPronto->saida,"por por _PR\n%so _AD\n",(tqPronto->epalavrae)+4); }
			else sprintf(tqPronto->saida,"%s pelar _VB\n",tqPronto->palavra);
		}
	}
	else if (strstr(" foi ",tqPronto->epalavrae)) {
		if (u==19 || u==16) /* PR ou VB apos */ sprintf(tqPronto->saida,"%s ir _VB\n",tqPronto->palavra);
		else sprintf(tqPronto->saida,"%s ser _VA\n",tqPronto->palavra);
	}
	else if ((i==0 || i==1 || i==11 || i==12) && tqPronto->etq_def==9) {
		/* (anterior=AI, AD, PD ou PI) e tqPronto=AP */
		/* O soldado fulano */ /* evita de 'soldado' ficar AP */
		if (tqPronto->lema_sfx[17][0]!='\0') strcpy(tqPronto->lema,tqPronto->lema_sfx[17]);
		else {
			strcpy(tqPronto->lema,(tqPronto->epalavrae)+1);
			tqPronto->lema[strlen(tqPronto->lema)-1]='\0';
			minusculasemacento(tqPronto->lema);
		}
		sprintf(tqPronto->saida,"%s %s _SU\n",tqPronto->palavra,tqPronto->lema);
	}
	/* finaliza tratamento dos ultimos especiais */
	else  { chama(tqPronto); }
} /* trata_tqPronto */

void chama (toquen *t) {
	/*char evento[TAMPAL], agente[TAMPAL];*/
	/* NOMINALIZA */
	/*strcpy(evento,"0"); strcpy(agente,"0");
	if (t->etq_def==17) /* _SU */ /*{*/
		/*if (!strrchr(t->lema,'-') && !strrchr(t->lema,'=')) sinonimar(t->lema,evento,agente);
	}
	else if (t->etq_def==2 || t->etq_def==9 || t->etq_def==19 || t->etq_def==3) /* _AJ_AP_VB_AV */ /*{
		if (!strrchr(t->lema,'-') && !strrchr(t->lema,'=')) nominalizar(t->lema,evento,agente,etq_2letras[t->etq_def]);
	}*/
	//sprintf(t->saida,"%s %s %s\n",t->palavra,t->lema,evento,agente,etq_2letras[t->etq_def]);
	sprintf(t->saida,"%s %s %s\n",t->palavra,t->lema,etq_2letras[t->etq_def]);
} /* chama */

int toqueniza (toquen *t) { /* bem aqui */
	int posicao=-1, x, letrahifen;
	char palavra[TAMPAL];
	/* laco para leitura dos caracteres do arquivo de entrada */
	while (!feof(stdin)) {
		carLidoAnterior=carLidoAtual;
		if (carLidoDemais!='\0') { carLidoAtual=carLidoDemais; carLidoDemais='\0'; }
		else if (!fread (&carLidoAtual, sizeof(char), 1, stdin)) break;
		if (carLidoAtual==' ') quantoshifens=0;
		else if (carLidoAtual=='-') quantoshifens++;
		if (quantoshifens>2 && carLidoAtual=='-') carLidoAtual=' ';
		if (carLidoAtual=='.' || carLidoAtual==',') { /* HIPOTESE 1: PONTO ou VIRGULA */
			if (posicao>=0) {
				if (strrchr("0123456789",carLidoAnterior)) {
					fread (&carLidoDemais, sizeof(char), 1, stdin);
					if (strrchr("0123456789",carLidoDemais)) { /* encontrado um numero com virgula decimal ou ponto de milhar */
						palavra[++posicao]=carLidoAtual; /* continua leitura de palavra */
					}
					else { /* carLidoDemais nao e` numero */ /* fecha a palavra e deixa a virgula ou o ponto para depois */
						carLidoDemais=carLidoAtual; break;
					}
				}
				else { /* anterior nao e numero */ /* fecha a palavra e deixa a virgula ou o ponto para depois */
					carLidoDemais=carLidoAtual; break;
				}
			}
			else { /* nao ha palavra se formando */
				palavra[++posicao]=carLidoAtual;
				if (carLidoAtual=='.') {
					int reticencias=0;
					fread (&carLidoDemais, sizeof(char), 1, stdin);
					while (carLidoDemais=='.') { reticencias++; fread (&carLidoDemais, sizeof(char), 1, stdin); }
					if (carLidoDemais==' ' || carLidoDemais=='\t') carLidoDemais='\0';
					if (reticencias>1) { /* fecha palavra com reticencias */
						palavra[++posicao]=carLidoAtual; palavra[++posicao]=carLidoAtual; palavra[posicao]='\0';
						t->etq_def=14; sprintf(t->saida,"... ... _PN\n");
						t->grava=CONTINUAGRAVA; strcpy(t->palavra,palavra); strcpy(t->lema,palavra); t->epalavrae[0]='\0';
						for (x=0;x<21;x++) { t->ant[x]=0.00; t->etq_sfx[x]=0.00; t->pos[x]=0.00; t->lema_sfx[x][0]='\0'; }
						return(1);
					}
				}
				break; /* ou fecha palavra com ponto ou com virgula */
			}
		}
		else { /* carLidoAtual nao e` virgula nem ponto */
			if (carLidoAtual==' ' || carLidoAtual=='\t') { if (posicao>=0) break; } /* HIPOTESE 2: espaco ou tabulacao */
			else if (carLidoAtual=='\n' || carLidoAtual=='\r') { /* HIPOTESE 3: mudanca de linha ou retorno */
				if (t->prev->etq_def!=14) carLidoDemais='.';
				if (posicao>=0) break;
			}
			else if (carLidoAtual=='>') {
				if (posicao>=0) {
					if (palavra[0]=='<') { /* HIPOTESE 4: final de tag no interior do texto*/
						palavra[++posicao]=carLidoAtual; palavra[++posicao]='\0';
						/* encontrou uma tag */
						/*texto
						</TEXT>
						</DOC> */
						t->grava=NAOGRAVA;
						t->saida[0]='\0'; strcpy(t->palavra,palavra); t->lema[0]='\0'; t->etq_def=NAOGRAVAVEL; t->epalavrae[0]='\0';
						for (x=0;x<21;x++) { t->ant[x]=0.00; t->etq_sfx[x]=0.00; t->pos[x]=0.00; t->lema_sfx[x][0]='\0'; }
						return(1);
					} /* fim do tratamento de tags no interior do texto */
					else /* carLidoAtual e? > mas a palavra nao iniciava com < */ carLidoDemais=carLidoAtual; break; /* HIPOTESE 5: maior que fica para depois */
				}
				else { /* carLidoAtual e'  > e nao havia palavra */ palavra[++posicao]=carLidoAtual; break; } /* HIPOTESE 5: maior que */
			}
			else if (strrchr("!?¿:/;()[]{}=$¢£+%&§#*Åå<",carLidoAtual)) {
				if (posicao>=0) {
					if (carLidoAnterior=='<') {
						if (carLidoAtual=='/') { /* HIPOTESE 6: / de </ em tag */
							palavra[++posicao]=carLidoAtual;
							fread (&carLidoAtual, sizeof(char), 1, stdin);
							for (;carLidoAtual!='>';) { palavra[++posicao]=carLidoAtual; fread (&carLidoAtual, sizeof(char), 1, stdin); }
							palavra[++posicao]=carLidoAtual; palavra[++posicao]='\0';
							if (!strcmp(palavra,"</TEXT>")) {
								/* terminou um texto */
								/* grava o identificador do doc e o numero de palavras */
								fwrite(&id_doc, sizeof(long), 1, fdocvoc);
								fwrite(&numero_de_palavras, sizeof(long), 1, fdocvoc);
								numero_de_palavras=0;
								posicao=-1;
								toquenizaTag(t);
								if (!strcmp(t->palavra,"/ARQ")) return(1);
							}
							else { /* tag no interior do texto */
								t->grava=NAOGRAVA;
								t->saida[0]='\0'; strcpy(t->palavra,palavra); t->lema[0]='\0'; t->etq_def=NAOGRAVAVEL; t->epalavrae[0]='\0';
								for (x=0;x<21;x++) { t->ant[x]=0.00; t->etq_sfx[x]=0.00; t->pos[x]=0.00; t->lema_sfx[x][0]='\0'; }
								return(1);
							}
						}
						else { carLidoDemais=carLidoAtual; break; } /* HIPOTESE 5: < mais simbolo que nao e` / */
					}
					else { carLidoDemais=carLidoAtual; break; } /* HIPOTESE 6: simbolo em palavra se formando e nao e` < anterior */
				}
				else {
					palavra[++posicao]=carLidoAtual; /* HIPOTESE 7:  inicia formacao de palavra com < */
					if (carLidoAtual!='<') break; /* HIPOTESE 8:  forma palavra de um caractere com simbolo diferente de < */
				}
			}
			else {
				if (strrchr("-abcdefghijklmnopqrstuvwxyzàáâãçéêíóôõú0123456789",carLidoAtual)) {
					palavra[++posicao]=carLidoAtual; /* HIPOTESE 9: letra ou numero */
				} /* e continua a leitura de novo toquen */
				else if (strrchr("ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÉÊÍÓÔÕÚ",carLidoAtual)) {
					palavra[++posicao]=carLidoAtual; /* HIPOTESE 9: letra ou numero */
				} /* e continua a leitura de novo toquen */
				else if (strrchr("@ÄÇÈËÌÎÏÑÒÖÙÛÜÝäèëìîïñòöùûüý",carLidoAtual)) {
					palavra[++posicao]=carLidoAtual; /* HIPOTESE 9: letra ou numero */
				} /* e continua a leitura de novo toquen */
			}
		}
	}
	if (posicao>=0) {
		if (primeiroToquen) { t->grava=LIGAGRAVA; primeiroToquen=0; }
		else t->grava=CONTINUAGRAVA;
		palavra[++posicao]='\0';
		numero_de_palavras=numero_de_palavras+1;
		t->saida[0]='\0';
		for (letrahifen=0;letrahifen<strlen(palavra);letrahifen++) if (palavra[letrahifen]!='-') break;
		strcpy(t->palavra,palavra+letrahifen);
		t->lema[0]='\0'; t->etq_def=ETIQUETAINDEFINIDA; t->epalavrae[0]='\0';
		for (x=0;x<21;x++) { t->ant[x]=0.00; t->etq_sfx[x]=0.00; t->pos[x]=0.00; t->lema_sfx[x][0]='\0'; }
	}
	else {
		if (feof(stdin)) {
			strcpy(t->palavra,"/ARQ");
			t->etq_def=NAOGRAVAVEL; t->saida[0]='\0'; t->grava=NAOGRAVA;
		}
	}
	return(1);
} /* toqueniza */

int toquenizaTag (toquen *t) { /* bem aqui tag */
	int x, tamtag=8;
	char tag[tamtag], nomeDoDoc[TAMPAL];
	tag[tamtag-1]='\0';
	/* laco para leitura dos caracteres do arquivo de entrada */
	while (!feof(stdin)) {
		if (!fread (&carLidoAtual, sizeof(char), 1, stdin)) break;
		for (x=0;x<tamtag-2;x++) tag[x]=tag[x+1];
		tag[tamtag-2]=carLidoAtual;
					/* encontrou uma tag */
					/* <DOC>
					<DOCNO>FSP950109-001</DOCNO>
					<DOCID>FSP950109-001</DOCID>
					<DATE>950109</DATE>
					<CATEGORY>PRIMEIRA_P?GINA</CATEGORY>
					<TEXT>
					texto  */
		if (!strcmp(tag,"<DOCNO>")) {
			fread (&carLidoAtual, sizeof(char), 1, stdin);
			for (x=0;carLidoAtual!='<';x++) { nomeDoDoc[x]=carLidoAtual; fread (&carLidoAtual, sizeof(char), 1, stdin); } nomeDoDoc[x]='\0';
			pegaIdDoc(nomeDoDoc);
		}
		else if (!strcmp(tag+1,"<TEXT>")) {
			fread (&carLidoAtual, sizeof(char), 1, stdin); /*      \n          */
			areaTextoTag=AREATEXTO;
			primeiroToquen=1;
			break;
		}
	}
	if (feof(stdin)) {
		strcpy(t->palavra,"/ARQ");
		t->etq_def=NAOGRAVAVEL; t->saida[0]='\0'; t->grava=NAOGRAVA;
	}
	return(1);
} /* toquenizaTag */

void le_lexico_acentos ( ) {
	int x,y;
	char estalinha[TAMLINHA], palavra_lexico_acentos[TAMPAL], lema_lexico_acentos[TAMPAL],s_etq_lexico_acentos[5];
	int etq_lexico_acentos;
	FILE *lexico_acentos;
	char arquivo[40];
	strcpy(arquivo,local_do_arquivo);
	strcat(arquivo,"acentos.let");
	lexico_acentos = fopen(arquivo,"r");
	raiz_lexico_acentos=NULL;
    if (lexico_acentos==NULL) { printf("\nImpossivel abrir arquivo acentos.let"); exit(1); }
	else {
		while (!feof(lexico_acentos)) {
			if (!fgets(estalinha,TAMLINHA,lexico_acentos)) break;
			/* estalinha (exemplo) = palavra lema etiqueta_0_a_20 */
			sscanf(estalinha,"%s %s %s",palavra_lexico_acentos,lema_lexico_acentos,s_etq_lexico_acentos);
			s_etq_lexico_acentos[2]='\0';
			etq_lexico_acentos=atoi(s_etq_lexico_acentos);
			/* insere na ?rvore de lexico_acentos */
			insere_arv_lexico_acentos (palavra_lexico_acentos, 0, lema_lexico_acentos, etq_lexico_acentos, &raiz_lexico_acentos);
		}
		fclose(lexico_acentos);
	}
} /* le_lexico_acentos */

void insere_arv_lexico_acentos (char *palavra, int coluna, char *lema_lexico_acentos, int etq_lexico_acentos, carac_lexico_acentos **raiz) {
	if (*raiz == NULL) {
		*raiz = (carac_lexico_acentos *)malloc(sizeof(carac_lexico_acentos));
		(*raiz)->carac = palavra[coluna];
		if (coluna==strlen(palavra)-1) {
			(*raiz)->lema_etq = (carac_lexico_acentos_lema_etq *)malloc(sizeof(carac_lexico_acentos_lema_etq));
			(*raiz)->lema_etq->lema = (char *)malloc(sizeof(char)*(1+strlen(lema_lexico_acentos)));
			strcpy((*raiz)->lema_etq->lema,lema_lexico_acentos);
			(*raiz)->lema_etq->etq_def=etq_lexico_acentos;
		}
		else {
			(*raiz)->lema_etq = NULL;
		}
		(*raiz)->menor = NULL;
		(*raiz)->igual = NULL;
		(*raiz)->maior = NULL;
		if (coluna<strlen(palavra)-1)
			insere_arv_lexico_acentos (palavra, coluna+1, lema_lexico_acentos, etq_lexico_acentos, &((*raiz)->igual));
	}
	else {
		if ( (*raiz)->carac == palavra[coluna] ) {
			if (coluna==strlen(palavra)-1) {
				if ((*raiz)->lema_etq==NULL) {
					(*raiz)->lema_etq = (carac_lexico_acentos_lema_etq *)malloc(sizeof(carac_lexico_acentos_lema_etq));
					(*raiz)->lema_etq->lema = (char *)malloc(sizeof(char)*(1+strlen(lema_lexico_acentos)));
					strcpy((*raiz)->lema_etq->lema,lema_lexico_acentos);
					(*raiz)->lema_etq->etq_def=etq_lexico_acentos;
				}
			} else if (coluna<strlen(palavra)-1)
				insere_arv_lexico_acentos (palavra, coluna+1, lema_lexico_acentos, etq_lexico_acentos, &((*raiz)->igual));
		}
		else if ( palavra[coluna] < (*raiz)->carac ) {
			insere_arv_lexico_acentos (palavra, coluna, lema_lexico_acentos, etq_lexico_acentos, &((*raiz)->menor));
		}
		else {
			insere_arv_lexico_acentos (palavra, coluna, lema_lexico_acentos, etq_lexico_acentos, &((*raiz)->maior));
		}
	}
} /* insere_arv_lexico_acentos */

void pesq_lexico_acentos(char *palavra, int coluna, char *lema, int *etq_def, carac_lexico_acentos *raiz) {
	if (raiz != NULL) {
		if (palavra[coluna]==raiz->carac) {
			if (coluna==strlen(palavra)-1) {
				if (raiz->lema_etq!=NULL) {
					strcpy(lema,raiz->lema_etq->lema);
					*etq_def=raiz->lema_etq->etq_def;
				}
			}
			else if (coluna<strlen(palavra)-1) {
				coluna=coluna+1;
				pesq_lexico_acentos (palavra, coluna, lema, &(*etq_def), raiz->igual);
			}
		}
		else if (palavra[coluna] < raiz->carac) {
			pesq_lexico_acentos (palavra, coluna, lema, &(*etq_def), raiz->menor);
		}
		else {
			pesq_lexico_acentos (palavra, coluna, lema, &(*etq_def), raiz->maior);
		}
	}
} /* pesq_lexico_acentos */

void le_locucoes ( ) {
	int x,y;
	char estalinha[TAMLINHA], palavra_locucoes[TAMPAL], lema_locucoes[TAMPAL],s_etq_locucoes[5];
	int etq_locucoes;
	FILE *locucoes;
	char arquivo[40];
	strcpy(arquivo,local_do_arquivo);
	strcat(arquivo,"locucoes.let");
	locucoes = fopen(arquivo,"r");
	raiz_locucoes=NULL;
    if (locucoes==NULL) { printf("\nImpossivel abrir arquivo locucoes.let"); exit(1); }
	else {
		while (!feof(locucoes)) {
			if (!fgets(estalinha,TAMLINHA,locucoes)) break;
			/* estalinha (exemplo) = palavra etiqueta_0_a_20 */
			sscanf(estalinha,"%s %s",palavra_locucoes,s_etq_locucoes);
			s_etq_locucoes[2]='\0';
			etq_locucoes=atoi(s_etq_locucoes);
			/* insere na ?rvore de lexico_acentos */
			insere_arv_locucoes (palavra_locucoes, 0, etq_locucoes, &raiz_locucoes);
		}
		fclose(locucoes);
	}
} /* le_locucoes */

void insere_arv_locucoes (char *palavra, int coluna, int etq_locucoes, carac_locucoes **raiz) {
	if (*raiz == NULL) {
		*raiz = (carac_locucoes *)malloc(sizeof(carac_locucoes));
		(*raiz)->carac = palavra[coluna];
		if (coluna==strlen(palavra)-1) {
			(*raiz)->etq_def=etq_locucoes;
		}
		else {
			(*raiz)->etq_def = ETIQUETAINDEFINIDA;
		}
		(*raiz)->menor = NULL;
		(*raiz)->igual = NULL;
		(*raiz)->maior = NULL;
		if (coluna<strlen(palavra)-1)
			insere_arv_locucoes (palavra, coluna+1, etq_locucoes, &((*raiz)->igual));
	}
	else {
		if ( (*raiz)->carac == palavra[coluna] ) {
			if (coluna==strlen(palavra)-1) {
				if ((*raiz)->etq_def==ETIQUETAINDEFINIDA) {
					(*raiz)->etq_def=etq_locucoes;
				}
			} else if (coluna<strlen(palavra)-1)
				insere_arv_locucoes (palavra, coluna+1, etq_locucoes, &((*raiz)->igual));
		}
		else if ( palavra[coluna] < (*raiz)->carac ) {
			insere_arv_locucoes (palavra, coluna, etq_locucoes, &((*raiz)->menor));
		}
		else {
			insere_arv_locucoes (palavra, coluna, etq_locucoes, &((*raiz)->maior));
		}
	}
} /* insere_arv_locucoes */

int pesq_locucoes(char *palavras, int coluna, carac_locucoes *raiz) {
	int etq_def=ETIQUETAINDEFINIDA, mais_etq_def;
	if (raiz != NULL) {
		if (palavras[coluna]==raiz->carac) {
			if (coluna==strlen(palavras)-1) { maior_coluna=coluna; etq_def=raiz->etq_def; }
			else {
				if (palavras[coluna+1]=='=') {
					if (raiz->igual!=NULL) {
						if (raiz->igual->carac=='=') { maior_coluna=coluna; etq_def=raiz->etq_def; }
					}
					else { maior_coluna=coluna; etq_def=raiz->etq_def; }
				}
			}
			if (coluna<strlen(palavras)-1) {
				mais_etq_def=pesq_locucoes (palavras, coluna+1, raiz->igual);
				if (mais_etq_def!=ETIQUETAINDEFINIDA) etq_def=mais_etq_def;
			}
		}
		else if (palavras[coluna] < raiz->carac) {
			etq_def=pesq_locucoes (palavras, coluna, raiz->menor);
		}
		else {
			etq_def=pesq_locucoes (palavras, coluna, raiz->maior);
		}
	}
	return(etq_def);
} /* pesq_locucoes */

void le_sufixos ( ) {
	char estalinha[TAMLINHA], sufixo[TAMPAL], etqlema_sufixos[TAMLINHA];
	int exato;
	FILE *sufixos;
	char arquivo[40];
	raiz_sufixos=NULL;
	for (exato=0;exato<=1;exato++) {
		if (exato==0) { strcpy(arquivo,local_do_arquivo); strcat(arquivo,"sufixos.let"); } /* sufixos do corpus */
		else { strcpy(arquivo,local_do_arquivo); strcat(arquivo,"sufixato.let"); } /* sufixos do lexico, com uma unica categoria morfologica */
		sufixos=fopen(arquivo,"r");
		if (sufixos==NULL) { printf("\nImpossivel abrir arquivo %s",arquivo); exit(1); }
		else {
			while (!feof(sufixos)) {
				if (!fgets(estalinha,TAMLINHA,sufixos)) break;
				/* estalinha (exemplo) = "aces 020.11 seco 090.02 secar 170.77 seca 190.10 secar " */
				sscanf(estalinha,"%s ",sufixo);
				strcpy(etqlema_sufixos,estalinha+strlen(sufixo)+1);
				etqlema_sufixos[strlen(etqlema_sufixos)-1]='\0';
				/* insere na arvore de sufixos */
				insere_arv_sufixos (sufixo, 0, exato, etqlema_sufixos, &raiz_sufixos);
			}
			fclose(sufixos);
		}
	}
} /* le_sufixos */

void insere_arv_sufixos (char *palavra, int coluna, int exato, char *etqlema_sufixos, carac_sufixos **raiz) {
	int x,y,i,coloca;
	char etq[2], freq[4], sufixlema[TAMPAL];
	if (*raiz == NULL) {
		*raiz = (carac_sufixos *)malloc(sizeof(carac_sufixos));
		(*raiz)->carac = palavra[coluna];
		for (x=0;x<21;x++) {
			(*raiz)->lema[x]=NULL;
			(*raiz)->etq[x]=0.0;
		}
		(*raiz)->etq_exato=ETIQUETAINDEFINIDA;
		if (coluna==strlen(palavra)-1) {
			etq[2]=freq[4]='\0';
			for (x=0;x<strlen(etqlema_sufixos);) {
				etq[0]=etqlema_sufixos[x++];
				etq[1]=etqlema_sufixos[x++];
				freq[0]=etqlema_sufixos[x++];
				freq[1]=etqlema_sufixos[x++];
				freq[2]=etqlema_sufixos[x++];
				freq[3]=etqlema_sufixos[x++];
				i=atoi(etq);
				if (exato) (*raiz)->etq_exato=i;
				(*raiz)->etq[i]=atof(freq);
				y=0;
				x++; /* passa o ! */
				for (;etqlema_sufixos[x]!='!';) {
					sufixlema[y++]=etqlema_sufixos[x++];
				}
				x++; /* passa o ! */
				sufixlema[y]='\0';
				if (sufixlema[0]!='\0') {
					(*raiz)->lema[i] = (char *)malloc(sizeof(char)*(1+strlen(sufixlema)));
					strcpy((*raiz)->lema[i],sufixlema);
				}
			}
		}
		(*raiz)->menor = NULL;
		(*raiz)->igual = NULL;
		(*raiz)->maior = NULL;
		if (coluna<strlen(palavra)-1)
			insere_arv_sufixos (palavra, coluna+1, exato, etqlema_sufixos, &((*raiz)->igual));
	}
	else {
		if ( (*raiz)->carac == palavra[coluna] ) {
			if (coluna==strlen(palavra)-1) {
				etq[2]=freq[4]='\0';
				for (x=0;x<strlen(etqlema_sufixos);) {
					etq[0]=etqlema_sufixos[x++];
					etq[1]=etqlema_sufixos[x++];
					freq[0]=etqlema_sufixos[x++];
					freq[1]=etqlema_sufixos[x++];
					freq[2]=etqlema_sufixos[x++];
					freq[3]=etqlema_sufixos[x++];
					i=atoi(etq);
					coloca=0;
					if (exato) {
						(*raiz)->etq_exato=i;
						coloca=1;
					}
					else {
						if ((*raiz)->etq_exato!=i) coloca=1;
					}
					if (coloca) {
						(*raiz)->etq[i]=atof(freq);
						y=0;
						x++; /* passa o ! */
						for (;etqlema_sufixos[x]!='!';) {
							sufixlema[y++]=etqlema_sufixos[x++];
						}
						x++; /* passa o ! */
						sufixlema[y]='\0';
						if (sufixlema[0]!='\0') {
							(*raiz)->lema[i] = (char *)malloc(sizeof(char)*(1+strlen(sufixlema)));
							strcpy((*raiz)->lema[i],sufixlema);
						}
					}
				}
			} else if (coluna<strlen(palavra)-1)
				insere_arv_sufixos (palavra, coluna+1, exato, etqlema_sufixos, &((*raiz)->igual));
		}
		else if ( palavra[coluna] < (*raiz)->carac ) {
			insere_arv_sufixos (palavra, coluna, exato, etqlema_sufixos, &((*raiz)->menor));
		}
		else {
			insere_arv_sufixos (palavra, coluna, exato, etqlema_sufixos, &((*raiz)->maior));
		}
	}
} /* insere_arv_sufixos */

void pesq_sufixos(char *palavra, int coluna, carac_sufixos *raiz) {
	/* aces 020.11!seco!090.02!secar!170.77!seca!190.10!secar! */
	if (raiz != NULL) {
		if (palavra[coluna]==raiz->carac) {
			maior_coluna=coluna;
			maior_sufixo=raiz;
			/* inicio da modificacao */
			if (coluna<strlen(palavra)-1) {
				pesq_sufixos (palavra, coluna+1, raiz->igual);
			}
		}
		else if (palavra[coluna] < raiz->carac) {
			pesq_sufixos (palavra, coluna, raiz->menor);
		}
		else {
			pesq_sufixos (palavra, coluna, raiz->maior);
		}
	}
} /* pesq_sufixos */

void trata_maior_sufixo (char *palavra) {
	char parte_inicial[TAMPAL];
	int x,nivel,tot,corte,w, achoumaiorsufixoproximo;
	carac_sufixos *aux, *ult, *auxigual;
	tot=strlen(palavra)-1; corte=tot-maior_coluna;
		/* se procura decantou e acha cantou */
		/* corte = 8 - 6 (sendo coluna_do_sufixo = 6)  corte = 2 e pega "de" de "decantou" */
		/* se procura cantou e acha decantou */
		/* corte = 6 - 6 (sendo coluna_do_sufixo = 6)* corte = 0 e pega nada de "cantou" */
	if (maior_sufixo->etq_exato!=ETIQUETAINDEFINIDA && corte==0) {
		etq_do_sufixo[maior_sufixo->etq_exato]=1.0;
		strcpy(lema_do_sufixo[maior_sufixo->etq_exato],maior_sufixo->lema[maior_sufixo->etq_exato]);
	}
	else {
		for (w=0;w<corte;w++) {
			parte_inicial[w]=palavra[w];
		}
		parte_inicial[w]='\0';
		minusculasemacento(parte_inicial);
		for (x=0;x<21;x++) {
			if (maior_sufixo->etq[x]>0.0) {
				etq_do_sufixo[x]=maior_sufixo->etq[x];
				if (maior_sufixo->lema[x]!=NULL) {
					strcpy(lema_do_sufixo[x],parte_inicial);
					strcat(lema_do_sufixo[x],maior_sufixo->lema[x]);
					/* concatena "de" com "cantar" para "decantou" */
					/* ou concatena nada com "cantar" para "cantou" */
				}
				else {
					/* @@@@@@@@@@@ tentar assim:
					@@@@ em vez de pegar o maior sufixo
					@@@@ pega o mais proximo com mesma etiqueta e com lema
					*/
					achoumaiorsufixoproximo=0;
					etq_do_sufixo[x]=(maior_sufixo->etq[x])/2;
					auxigual=maior_sufixo->igual;
					nivel=0;
					while (auxigual) {
						nivel++;
						if (auxigual->etq[x]>0.0) if (auxigual->lema[x]!=NULL) { achoumaiorsufixoproximo=1; break; }
						aux=auxigual->menor;
						while (aux) {
							if (aux->etq[x]>0.0) if (aux->lema[x]!=NULL) { achoumaiorsufixoproximo=1; auxigual=aux; break; }
							aux=aux->menor;
						}
						if (achoumaiorsufixoproximo) break;
						aux=auxigual->maior;
						while (aux) {
							if (aux->etq[x]>0.0) if (aux->lema[x]!=NULL) { achoumaiorsufixoproximo=1; auxigual=aux; break; }
							aux=aux->maior;
						}
						if (achoumaiorsufixoproximo) break;
						auxigual=auxigual->igual;
					}
					if (achoumaiorsufixoproximo) {
						strcpy(lema_do_sufixo[x],parte_inicial);
						strcat(lema_do_sufixo[x],(auxigual->lema[x])+nivel);
					}

					/*
					etq_do_sufixo[x]=(maior_sufixo->etq[x])/10;
					ult=NULL;
					aux=maior_sufixo->igual;
					nivel=0;
					while (aux) {
						ult=aux;
						nivel++;
						aux=aux->igual;
					}
					if (ult!=NULL) {
						if (ult->etq[x]>0.0) {
							if (ult->lema[x]!=NULL) {
								strcpy(lema_do_sufixo[x],parte_inicial);
								strcat(lema_do_sufixo[x],(ult->lema[x])+nivel);
							}
						}
     				}
					*/
				}
			}
		}
	}
} /* trata_maior_sufixo */

void combinatracao(char palavra_ant[TAMPAL], char epalavrae_ant[TAMPAL], char lema_ant[TAMPAL], int etq_ant, char palavra_pos[TAMPAL], char epalavrae_pos[TAMPAL], char lema_pos[TAMPAL], int etq_pos) {
	int x;
	/* tqLeitura fica com a parte anterior da combinatracao */
	if (etq_ant!=ETIQUETAINDEFINIDA) sprintf(tqLeitura->saida,"%s %s %s\n",palavra_ant,lema_ant,etq_2letras[etq_ant]);
	strcpy(tqLeitura->palavra,palavra_ant);
	if (tqLeitura->maius) tqLeitura->palavra[0]=letra_maiuscula(tqLeitura->palavra[0]);
	strcpy(tqLeitura->lema,lema_ant);
	strcpy(tqLeitura->epalavrae,epalavrae_ant);
	tqLeitura->etq_def=etq_ant;
	/* tqLeitura->prox fica com a parte posterior da combinatracao */
	sprintf(tqLeitura->prox->saida,"%s %s %s\n",palavra_pos,lema_pos,etq_2letras[etq_pos]);
	tqLeitura->prox->grava=CONTINUAGRAVA;
	strcpy(tqLeitura->prox->palavra,palavra_pos);
	strcpy(tqLeitura->prox->lema,lema_pos);
	strcpy(tqLeitura->prox->epalavrae,epalavrae_pos);
	tqLeitura->prox->etq_def=etq_pos;
	for (x=0;x<21;x++) { tqLeitura->prox->ant[x]=0.00; tqLeitura->prox->etq_sfx[x]=0.00; tqLeitura->prox->pos[x]=0.00; tqLeitura->prox->lema_sfx[x][0]='\0'; }
	houveComb=1;
} /* combinatracao */

/*void pegaIdDoc (char *nomeDoDoc) {
	long tipoDoc;
	char mes[4], dia[4], numDocNoDia[6];
	int nmes,ndia,numero;
	if (nomeDoDoc[0]=='P') {
		if (nomeDoDoc[11]=='5') {
			tipoDoc=0;
			mes[0]=nomeDoDoc[12];
			mes[1]=nomeDoDoc[13];
			mes[2]='\0';
			dia[0]=nomeDoDoc[14];
			dia[1]=nomeDoDoc[15];
			dia[2]='\0';
			numDocNoDia[0]=nomeDoDoc[17];
			numDocNoDia[1]=nomeDoDoc[18];
			numDocNoDia[2]=nomeDoDoc[19];
			numDocNoDia[3]='\0';
		}
		else {
			tipoDoc=10000000;
			mes[0]=nomeDoDoc[12];
			mes[1]=nomeDoDoc[13];
			mes[2]='\0';
			dia[0]=nomeDoDoc[14];
			dia[1]=nomeDoDoc[15];
			dia[2]='\0';
			numDocNoDia[0]=nomeDoDoc[17];
			numDocNoDia[1]=nomeDoDoc[18];
			numDocNoDia[2]=nomeDoDoc[19];
			numDocNoDia[3]='\0';
		}
	}
	else {
		if (nomeDoDoc[4]=='5') {
			tipoDoc=20000000;
			mes[0]=nomeDoDoc[5];
			mes[1]=nomeDoDoc[6];
			mes[2]='\0';
			dia[0]=nomeDoDoc[7];
			dia[1]=nomeDoDoc[8];
			dia[2]='\0';
			numDocNoDia[0]=nomeDoDoc[10];
			numDocNoDia[1]=nomeDoDoc[11];
			numDocNoDia[2]=nomeDoDoc[12];
			numDocNoDia[3]='\0';
		}
		else {
			tipoDoc=30000000;
			mes[0]=nomeDoDoc[5];
			mes[1]=nomeDoDoc[6];
			mes[2]='\0';
			dia[0]=nomeDoDoc[7];
			dia[1]=nomeDoDoc[8];
			dia[2]='\0';
			numDocNoDia[0]=nomeDoDoc[10];
			numDocNoDia[1]=nomeDoDoc[11];
			numDocNoDia[2]=nomeDoDoc[12];
			numDocNoDia[3]='\0';
		}
	}
	nmes=atoi(mes);
	ndia=atoi(dia);
	numero=atoi(numDocNoDia);
	id_doc=tipoDoc + 100000*(long)nmes + 1000*(long)ndia + (long)numero;
} /* pegaIdDoc usado para as colecoes  publico95 ,publico94, Folha95 e Folha94 */

void pegaIdDoc (char *nomeDoDoc) {
	id_doc=atoi(nomeDoDoc);
} /* pegaIdDoc usado para folha94formatado.txt */

void minusculasemacento (char *palavra) {
	int x;
	for (x=0;x<strlen(palavra);x++) {
		switch (palavra[x]) {
			case 'Á': palavra[x]='a'; break;
			case 'À': palavra[x]='a'; break;
			case 'Â': palavra[x]='a'; break;
			case 'Ã': palavra[x]='a'; break;
			case 'É': palavra[x]='e'; break;
			case 'Ê': palavra[x]='e'; break;
			case 'Í': palavra[x]='i'; break;
			case 'Ó': palavra[x]='o'; break;
			case 'Ô': palavra[x]='o'; break;
			case 'Õ': palavra[x]='o'; break;
			case 'Ú': palavra[x]='u'; break;
			case 'Ç': palavra[x]='c'; break;
			case 'Ñ': palavra[x]='n'; break;
			case 'È': palavra[x]='e'; break;
			case 'Ì': palavra[x]='i'; break;
			case 'Ò': palavra[x]='o'; break;
			case 'Ù': palavra[x]='u'; break;
			case 'Û': palavra[x]='u'; break;
			case 'Î': palavra[x]='i'; break;
			case 'Ä': palavra[x]='a'; break;
			case 'Ë': palavra[x]='e'; break;
			case 'Ï': palavra[x]='i'; break;
			case 'Ö': palavra[x]='o'; break;
			case 'Ü': palavra[x]='u'; break;
			case 'Ý': palavra[x]='y'; break;
			case 'á': palavra[x]='a'; break;
			case 'à': palavra[x]='a'; break;
			case 'â': palavra[x]='a'; break;
			case 'ã': palavra[x]='a'; break;
			case 'é': palavra[x]='e'; break;
			case 'ê': palavra[x]='e'; break;
			case 'í': palavra[x]='i'; break;
			case 'ï': palavra[x]='i'; break;
			case 'ó': palavra[x]='o'; break;
			case 'ô': palavra[x]='o'; break;
			case 'õ': palavra[x]='o'; break;
			case 'ú': palavra[x]='u'; break;
			case 'ç': palavra[x]='c'; break;
			case 'ñ': palavra[x]='n'; break;
			case 'è': palavra[x]='e'; break;
			case 'ì': palavra[x]='i'; break;
			case 'ò': palavra[x]='o'; break;
			case 'î': palavra[x]='i'; break;
			case 'ù': palavra[x]='u'; break;
			case 'û': palavra[x]='u'; break;
			case 'ä': palavra[x]='a'; break;
			case 'ë': palavra[x]='e'; break;
			case 'ö': palavra[x]='o'; break;
			case 'ü': palavra[x]='u'; break;
			case 'ý': palavra[x]='y'; break;
			default : palavra[x] = tolower(palavra[x]);
		}
	}
} /* minusculasemacento */

void minuscula (char *palavra) {
	int x;
	for (x=0;x<strlen(palavra);x++) {
		switch (palavra[x]) {
			case 'Á': palavra[x]='á'; break;
			case 'À': palavra[x]='à'; break;
			case 'Â': palavra[x]='â'; break;
			case 'Ã': palavra[x]='ã'; break;
			case 'É': palavra[x]='é'; break;
			case 'Ê': palavra[x]='ê'; break;
			case 'Í': palavra[x]='í'; break;
			case 'Ó': palavra[x]='ó'; break;
			case 'Ô': palavra[x]='ô'; break;
			case 'Õ': palavra[x]='õ'; break;
			case 'Ú': palavra[x]='ú'; break;
			case 'Ç': palavra[x]='ç'; break;
			case 'Ñ': palavra[x]='ñ'; break;
			case 'È': palavra[x]='è'; break;
			case 'Ì': palavra[x]='ì'; break;
			case 'Ò': palavra[x]='ò'; break;
			case 'Ù': palavra[x]='ù'; break;
			case 'Û': palavra[x]='û'; break;
			case 'Î': palavra[x]='î'; break;
			case 'Ä': palavra[x]='ä'; break;
			case 'Ë': palavra[x]='ë'; break;
			case 'Ï': palavra[x]='ï'; break;
			case 'Ö': palavra[x]='ö'; break;
			case 'Ü': palavra[x]='ü'; break;
			case 'Ý': palavra[x]='ý'; break;
			default : palavra[x] = tolower(palavra[x]);
		}
	}
} /* minuscula */

char letra_minuscula (char letra) {
	switch (letra) {
		case 'Á': letra='á'; break;
		case 'À': letra='à'; break;
		case 'Â': letra='â'; break;
		case 'Ã': letra='ã'; break;
		case 'É': letra='é'; break;
		case 'Ê': letra='ê'; break;
		case 'Í': letra='í'; break;
		case 'Ó': letra='ó'; break;
		case 'Ô': letra='ô'; break;
		case 'Õ': letra='õ'; break;
		case 'Ú': letra='ú'; break;
		case 'Ç': letra='ç'; break;
		case 'Ñ': letra='ñ'; break;
		case 'È': letra='è'; break;
		case 'Ì': letra='ì'; break;
		case 'Ò': letra='ò'; break;
		case 'Ù': letra='ù'; break;
		case 'Û': letra='û'; break;
		case 'Î': letra='î'; break;
		case 'Ä': letra='ä'; break;
		case 'Ë': letra='ë'; break;
		case 'Ï': letra='ï'; break;
		case 'Ö': letra='ö'; break;
		case 'Ü': letra='ü'; break;
		case 'Ý': letra='ý'; break;
		default : letra = tolower(letra);
	}
	return(letra);
} /* letra_minuscula */

char letra_maiuscula (char letra) {
	switch (letra) {
		case 'á': letra='Á'; break;
		case 'à': letra='À'; break;
		case 'â': letra='Â'; break;
		case 'ã': letra='Ã'; break;
		case 'é': letra='É'; break;
		case 'ê': letra='Ê'; break;
		case 'í': letra='Í'; break;
		case 'ï': letra='Ï'; break;
		case 'ó': letra='Ó'; break;
		case 'ô': letra='Ô'; break;
		case 'õ': letra='Õ'; break;
		case 'ú': letra='Ú'; break;
		case 'ç': letra='Ç'; break;
		case 'ñ': letra='Ñ'; break;
		case 'è': letra='È'; break;
		case 'ì': letra='Ì'; break;
		case 'ò': letra='Ò'; break;
		case 'î': letra='Î'; break;
		case 'ù': letra='Ù'; break;
		case 'û': letra='Û'; break;
		case 'ä': letra='Ä'; break;
		case 'ë': letra='Ë'; break;
		case 'ö': letra='Ö'; break;
		case 'ü': letra='Ü'; break;
		case 'ý': letra='Ý'; break;
		default : letra = toupper(letra);
	}
	return (letra);
} /* letra_maiuscula */
